{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "c2b92e14-aa49-4b4c-b0c3-af07314eb8a7",
   "metadata": {},
   "source": [
    "# Supplementary material for Q11"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "a99e7ac1-9681-45c1-a4ef-715491d22828",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "PyTorch version: 2.0.1\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "print(f\"PyTorch version: {torch.__version__}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "94a0ce0c-2b97-4c1b-a1a8-8edaa547c5e1",
   "metadata": {},
   "source": [
    "## 1) Convolutional neural network architecture"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "c0cfee5b-2923-4f52-8e82-7ea0a306bcc9",
   "metadata": {},
   "outputs": [],
   "source": [
    "class PyTorchCNN(torch.nn.Module):\n",
    "    def __init__(self, num_classes):\n",
    "        super().__init__()\n",
    "\n",
    "        self.num_classes = num_classes\n",
    "        self.features = torch.nn.Sequential(\n",
    "            torch.nn.Conv2d(3, 5, kernel_size=5, stride=1), # 5 * (5*5 * 3) + 5\n",
    "            torch.nn.ReLU(),\n",
    "            torch.nn.MaxPool2d(kernel_size=5, stride=2),\n",
    "            torch.nn.Conv2d(5, 12, kernel_size=3, stride=1),  # 12 * (3*3 * 5) + 12\n",
    "            torch.nn.ReLU(),\n",
    "            torch.nn.AvgPool2d(kernel_size=3, stride=2),\n",
    "            torch.nn.ReLU(),\n",
    "        )\n",
    "\n",
    "        self.classifier = torch.nn.Sequential(\n",
    "            torch.nn.Flatten(),\n",
    "            torch.nn.Linear(192, 128), # 192 * 128 + 128\n",
    "            torch.nn.ReLU(),\n",
    "            torch.nn.Linear(128, num_classes), # 128 * 10 + 10\n",
    "        )\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = self.features(x)\n",
    "        x = self.classifier(x)\n",
    "        return x"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "811d96c1-2269-4cc5-9b40-d3ddbc872693",
   "metadata": {},
   "source": [
    "## 2) Computing the number of parameters"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8473aea9-c497-46d6-8374-d4c8e044ed31",
   "metadata": {},
   "source": [
    "### 2 a) By hand"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "6e0a7164-846f-4054-9493-ccacc519ee51",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "932\n"
     ]
    }
   ],
   "source": [
    "# convolutional part\n",
    "conv_part =  (5 * (5*5 * 3) + 5 ) + ( 12 * (3*3 * 5) + 12 )\n",
    "print(conv_part)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "313597c8-3295-4373-b0dc-f6273733afc9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "25994\n"
     ]
    }
   ],
   "source": [
    "# fully connected part\n",
    "fc_part = 192*128+128 + 128*10+10\n",
    "print(fc_part)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "a7d77806-33c1-4b0a-bf29-22643ec4938f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "26926\n"
     ]
    }
   ],
   "source": [
    "# total\n",
    "print(conv_part + fc_part)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dd5e430f-1f76-477c-86e7-22372d6dbbdd",
   "metadata": {},
   "source": [
    "### 2 b) Adding .parameters() programmatically"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "d1f1d38c-d19e-41bb-b8b2-c9cfa19a8b42",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = PyTorchCNN(10)\n",
    "\n",
    "def count_parameters(model): \n",
    "    return sum(p.numel() for p in model.parameters() if p.requires_grad)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "id": "9c6f9f5f-422e-4154-9db8-335bab3e5a7a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "932"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# convolutional part\n",
    "count_parameters(model.features)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "413ec8c3-e557-4304-9798-d227c0091adc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "25994"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# fully connected part\n",
    "count_parameters(model.classifier)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "5c6bbda1-16a5-4e04-b0f0-6cb65e550824",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "26926"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# total\n",
    "count_parameters(model)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "417c1976-4c5b-49bd-954f-b62819c9be64",
   "metadata": {},
   "source": [
    "### 2 c) Computer the memory size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 97,
   "id": "ac9ca70a-e239-4ecb-bacf-247ed08e5ce4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " 0.11 Mb\n"
     ]
    }
   ],
   "source": [
    "import sys\n",
    "\n",
    "def calculate_size(model): \n",
    "    return sum(p.element_size()*p.numel() for p in model.parameters())\n",
    "\n",
    "size_in_bytes = calculate_size(model)\n",
    "size_in_megabytes = size_in_bytes * 1e-6\n",
    "\n",
    "print(f\"{size_in_megabytes: .2f} Mb\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f4c99a55-0b72-4247-8d1b-d06ea4726b32",
   "metadata": {},
   "source": [
    "### 2 d) Using the torchinfo library"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "id": "2967e0b4-692e-4858-a8e3-3586b1744481",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Torchinfo version: 1.7.2\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "==========================================================================================\n",
       "Layer (type:depth-idx)                   Output Shape              Param #\n",
       "==========================================================================================\n",
       "PyTorchCNN                               [16, 10]                  --\n",
       "├─Sequential: 1-1                        [16, 12, 4, 4]            --\n",
       "│    └─Conv2d: 2-1                       [16, 5, 28, 28]           380\n",
       "│    └─ReLU: 2-2                         [16, 5, 28, 28]           --\n",
       "│    └─MaxPool2d: 2-3                    [16, 5, 12, 12]           --\n",
       "│    └─Conv2d: 2-4                       [16, 12, 10, 10]          552\n",
       "│    └─ReLU: 2-5                         [16, 12, 10, 10]          --\n",
       "│    └─AvgPool2d: 2-6                    [16, 12, 4, 4]            --\n",
       "│    └─ReLU: 2-7                         [16, 12, 4, 4]            --\n",
       "├─Sequential: 1-2                        [16, 10]                  --\n",
       "│    └─Flatten: 2-8                      [16, 192]                 --\n",
       "│    └─Linear: 2-9                       [16, 128]                 24,704\n",
       "│    └─ReLU: 2-10                        [16, 128]                 --\n",
       "│    └─Linear: 2-11                      [16, 10]                  1,290\n",
       "==========================================================================================\n",
       "Total params: 26,926\n",
       "Trainable params: 26,926\n",
       "Non-trainable params: 0\n",
       "Total mult-adds (M): 6.07\n",
       "==========================================================================================\n",
       "Input size (MB): 0.20\n",
       "Forward/backward pass size (MB): 0.67\n",
       "Params size (MB): 0.11\n",
       "Estimated Total Size (MB): 0.98\n",
       "=========================================================================================="
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# using https://github.com/TylerYep/torchinfo\n",
    "# pip install torchinfo\n",
    "\n",
    "import torchinfo\n",
    "\n",
    "print(f\"Torchinfo version: {torchinfo.__version__}\")\n",
    "\n",
    "batch_size = 16\n",
    "torchinfo.summary(model, input_size=(batch_size, 3, 32, 32))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4cedef9f-1d7a-4edf-854c-75cdb967f55e",
   "metadata": {},
   "source": [
    "## 3) ADAM optimizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "0a967f58-2808-4c66-b6dc-1493c50c263c",
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = torch.optim.Adam(model.parameters())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "0b1af818-0275-488d-a08f-c6d6a5a267ed",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<function list.count(value, /)>"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "optimizer.param_groups.count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "id": "538c2a18-89f9-422c-80d5-7afe9abf95c9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "26926"
      ]
     },
     "execution_count": 104,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(p.numel() for p in optimizer.param_groups[0]['params'])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c910ddaf-ceb9-4e0c-830b-c69c12a87b61",
   "metadata": {},
   "source": [
    "## 4) BatchNorm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "id": "8ff1530f-df95-4201-9cb3-b1392752590c",
   "metadata": {},
   "outputs": [],
   "source": [
    "class PyTorchCNN(torch.nn.Module):\n",
    "    def __init__(self, num_classes):\n",
    "        super().__init__()\n",
    "\n",
    "        self.num_classes = num_classes\n",
    "        self.features = torch.nn.Sequential(\n",
    "            torch.nn.Conv2d(3, 5, kernel_size=5, stride=1),\n",
    "            torch.nn.BatchNorm2d(5),  # NEW!\n",
    "            torch.nn.ReLU(),\n",
    "            torch.nn.MaxPool2d(kernel_size=5, stride=2),\n",
    "            torch.nn.Conv2d(5, 12, kernel_size=3, stride=1),\n",
    "            torch.nn.BatchNorm2d(12),  # NEW!\n",
    "            torch.nn.ReLU(),\n",
    "            torch.nn.AvgPool2d(kernel_size=3, stride=2),\n",
    "            torch.nn.ReLU(),\n",
    "        )\n",
    "\n",
    "        self.classifier = torch.nn.Sequential(\n",
    "            torch.nn.Flatten(),\n",
    "            torch.nn.Linear(192, 128),\n",
    "            torch.nn.BatchNorm1d(128),  # NEW!\n",
    "            torch.nn.ReLU(),\n",
    "            torch.nn.Linear(128, num_classes),\n",
    "        )\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = self.features(x)\n",
    "        x = self.classifier(x)\n",
    "        return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "id": "5561e44b-0d3d-4a19-ac74-f5b033b6bdb1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "==========================================================================================\n",
       "Layer (type:depth-idx)                   Output Shape              Param #\n",
       "==========================================================================================\n",
       "PyTorchCNN                               [16, 10]                  --\n",
       "├─Sequential: 1-1                        [16, 12, 4, 4]            --\n",
       "│    └─Conv2d: 2-1                       [16, 5, 28, 28]           380\n",
       "│    └─BatchNorm2d: 2-2                  [16, 5, 28, 28]           10\n",
       "│    └─ReLU: 2-3                         [16, 5, 28, 28]           --\n",
       "│    └─MaxPool2d: 2-4                    [16, 5, 12, 12]           --\n",
       "│    └─Conv2d: 2-5                       [16, 12, 10, 10]          552\n",
       "│    └─BatchNorm2d: 2-6                  [16, 12, 10, 10]          24\n",
       "│    └─ReLU: 2-7                         [16, 12, 10, 10]          --\n",
       "│    └─AvgPool2d: 2-8                    [16, 12, 4, 4]            --\n",
       "│    └─ReLU: 2-9                         [16, 12, 4, 4]            --\n",
       "├─Sequential: 1-2                        [16, 10]                  --\n",
       "│    └─Flatten: 2-10                     [16, 192]                 --\n",
       "│    └─Linear: 2-11                      [16, 128]                 24,704\n",
       "│    └─BatchNorm1d: 2-12                 [16, 128]                 256\n",
       "│    └─ReLU: 2-13                        [16, 128]                 --\n",
       "│    └─Linear: 2-14                      [16, 10]                  1,290\n",
       "==========================================================================================\n",
       "Total params: 27,216\n",
       "Trainable params: 27,216\n",
       "Non-trainable params: 0\n",
       "Total mult-adds (M): 6.07\n",
       "==========================================================================================\n",
       "Input size (MB): 0.20\n",
       "Forward/backward pass size (MB): 1.34\n",
       "Params size (MB): 0.11\n",
       "Estimated Total Size (MB): 1.65\n",
       "=========================================================================================="
      ]
     },
     "execution_count": 108,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model = PyTorchCNN(10)\n",
    "\n",
    "torchinfo.summary(model, input_size=(batch_size, 3, 32, 32))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a2194290-c90b-4716-a515-e1ce3aa2dfb8",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
